import pandas as pd

# ========== 配置 ==========
input_path = r"C:\Users\Lenovo\Desktop\000\方案\无结构验证\02\融合dpo模型生成评估数据的方案.xlsx"   # 输入文件（prompt + 方案1）
output_path = r"C:\Users\Lenovo\Desktop\000\价值维度匹配分析\模型生成方案维度占比\无结构的02\专家维度_dpo融合.xlsx"  # 输出文件
# ==========================

# ====== 定义维度关键词字典（示例：公众版，你可以换专家版） ======
dim_dict = {
    "宜居环境与生活品质": ["生活", "休闲", "居住", "宜居", "生活品质"],
    "城市更新与系统优化": ["发展", "改造", "加强", "配套", "功能", "品质", "集聚", "完善", "推动", "活力",
                           "整合", "绿化", "科技", "核心", "融合", "水平", "利用效率", "引导", "促进",
                           "资源", "强化", "结合", "服务", "保护", "提供", "利用", "引入", "目标", "需求"],
    "空间利用与人居环境": ["用地", "文化", "区域", "设施", "空间", "生态", "城市", "居民", "园区",
                           "基础设施", "土地", "周边", "社区", "历史", "环境", "片区", "公共设施", "交通",
                           "公共", "公共服务", "空间布局", "土地利用", "街区", "定位"],
    "品质提升与设施改善": ["提升", "优化", "提高", "升级", "增加", "更新", "改善"],
    "产业经济与结构优化": ["产业", "商业", "企业", "创新", "产业结构", "工业"],
    "战略规划与发展思路": ["规划", "思路", "创意", "逻辑"],
    "建设导向与空间塑造": ["打造", "建设", "构建", "建筑"]
}
# 1. 读取数据
df = pd.read_excel(input_path)

# 2. 给方案打标签
def match_dimensions(text, dim_dict):
    matched_dims = []
    for dim, keywords in dim_dict.items():
        for kw in keywords:
            if kw in str(text):
                matched_dims.append(dim)
                break  # 避免同一维度重复计数
    return matched_dims

df["匹配维度"] = df["方案1"].apply(lambda x: match_dimensions(x, dim_dict))

# 3. 统计维度分布
all_dims = []
for dims in df["匹配维度"]:
    all_dims.extend(dims)

dim_count = pd.Series(all_dims).value_counts().reset_index()
dim_count.columns = ["维度", "出现次数"]

# 计算占比
dim_count["占比(%)"] = dim_count["出现次数"] / dim_count["出现次数"].sum() * 100

# 4. 保存结果
with pd.ExcelWriter(output_path) as writer:
    df.to_excel(writer, sheet_name="逐条结果", index=False)
    dim_count.to_excel(writer, sheet_name="维度统计", index=False)

print("完成 ✅ 结果已保存到：", output_path)
